{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "unusual-profile",
   "metadata": {},
   "outputs": [],
   "source": [
    "import os\n",
    "import pickle\n",
    "import json\n",
    "import numpy as np"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "romantic-provider",
   "metadata": {},
   "outputs": [],
   "source": [
    "IO_NET_sinks = ['java.io',  'SQL', 'okhttp3', 'SharedPreferences']\n",
    "\n",
    "interested_var = {'java.io' : 0,\n",
    "                 'write': 0,\n",
    "                 'SQL': -10, # should be params\n",
    "                 'okhttp3': -10,\n",
    "                 'SharedPreferences': 1}\n",
    "\n",
    "loose_compare = [' match',\n",
    "                ' contains']\n",
    "\n",
    "strict_compare = [' startsWith',\n",
    "                 ' endsWith',\n",
    "                 'indexOf']\n",
    "\n",
    "text_extract = [' opt',\n",
    "               ' substring']\n",
    "\n",
    "\n",
    "def parse_result(path):\n",
    "    f = open(path)\n",
    "    l = []\n",
    "    for line in f.readlines():\n",
    "        l.append(line)\n",
    "    f.close()\n",
    "    ss = ''.join(l).replace('\\n', ',')\n",
    "    ss = '[' + ss[:-1] + ']'\n",
    "    return json.loads(ss)\n",
    "\n",
    "def is_interest_sink(mthd):\n",
    "    for s in IO_NET_sinks:\n",
    "        if s in mthd:\n",
    "            return s\n",
    "        \n",
    "    return False\n",
    "\n",
    "def get_tv_pos(unit, tv): # 0,1,2: pos; -1: None\n",
    "    s = tv.find('[')\n",
    "    if s >= 0:\n",
    "        tv_list = tv[tv.find('[') + 1 : tv.rfind(']')]\n",
    "        tv_list = [s.strip for s in tv_list.split(',')]\n",
    "        rp = unit.rfind(')')\n",
    "        lp = unit.find(')>(') + 3\n",
    "        s = unit[lp : rp]\n",
    "        param_list = [s.strip() for s in s.split(',')]\n",
    "\n",
    "        tv_pos = []\n",
    "\n",
    "        for i in range(len(param_list)):\n",
    "            if param_list[i] in tv:\n",
    "                tv_pos.append(i)\n",
    "        if len(tv_pos) > 0:\n",
    "            return tv_pos\n",
    "        return [-1,]\n",
    "    else:\n",
    "        rp = unit.rfind(')')\n",
    "        lp = unit.find(')>(') + 3\n",
    "        s = unit[lp : rp]\n",
    "        param_list = [s.strip() for s in s.split(',')]\n",
    "    #     print(param_list)\n",
    "        for i in range(len(param_list)):\n",
    "            if param_list[i] == tv:\n",
    "                return [i,]\n",
    "        return [-1,]\n",
    "\n",
    "def deduplicate(json_list):\n",
    "    dd_json = []\n",
    "    dd_json_set = set()\n",
    "    for js in json_list:\n",
    "        if str(js) in dd_json_set:\n",
    "            pass\n",
    "        else:\n",
    "            dd_json.append(js)\n",
    "            dd_json_set.add(str(js))\n",
    "    \n",
    "    return dd_json\n",
    "\n",
    "\n",
    "def is_interest_apk(p):\n",
    "    json_list = parse_result(p)\n",
    "    \n",
    "    for js in json_list:\n",
    "        low_source = js['source']['unit'].lower()\n",
    "        if 'org.json.jsonobject' in low_source and 'optstring' in low_source or 'java.util.map' in low_source and 'get(' in low_source:\n",
    "            continue\n",
    "        rec_sinks = js['sinks']\n",
    "        for s in rec_sinks:\n",
    "            if is_interest_sink(s['unit']):\n",
    "                return deduplicate(json_list)\n",
    "        else:\n",
    "            continue\n",
    "    return None\n",
    "\n",
    "def get_mthd_type(s):\n",
    "    u = s['unit']\n",
    "    mthd = u[u.find('.<') + 2 : u.rfind(')>(') + 1]\n",
    "    for lc in loose_compare:\n",
    "        if lc in mthd:\n",
    "            return 0\n",
    "    \n",
    "    for sc in strict_compare:\n",
    "        if sc in mthd:\n",
    "            return 1\n",
    "    \n",
    "    for te in text_extract:\n",
    "        if te in mthd:\n",
    "            return 2\n",
    "    \n",
    "    return -1\n",
    "\n",
    "def eval_type(m):\n",
    "    if m[1] and m[2]: return 4 # strict format + text extract\n",
    "    if m[0] and m[2]: return 3  # loose format + text extract\n",
    "    if m[1]: return 2  # Strict format only\n",
    "    if m[0]: return 1  # loose format only\n",
    "    return 0  # No validate\n",
    "\n",
    "def get_result(json_list):\n",
    "    \n",
    "    group_ori_source = {}\n",
    "\n",
    "    for r in json_list[::-1]:# record {'sinks':..., 'source': ...}\n",
    "        osource = r['originsource']['unit']\n",
    "        group_ori_source.setdefault(osource, list([])).append(r)\n",
    "\n",
    "    exist_type = np.zeros(5)\n",
    "\n",
    "    for r in group_ori_source.values():\n",
    "        r_mthd_type = [0, 0, 0]\n",
    "        for record in r:\n",
    "            if 'android.content.ClipboardManager' in record['source']['unit']:\n",
    "                r_mthd_type = [0, 0, 0]\n",
    "            for s in record['sinks']:\n",
    "\n",
    "                u = s['unit']\n",
    "                mtype = get_mthd_type(s)\n",
    "                if mtype >= 0:\n",
    "                    r_mthd_type[mtype] = 1\n",
    "                    continue\n",
    "\n",
    "                mthd = is_interest_sink(u)\n",
    "                if not mthd: continue\n",
    "                tv = s['taint_var']\n",
    "                vpos = get_tv_pos(u, tv)\n",
    "                if interested_var[mthd] in vpos or interested_var[mthd] == -10 and max(vpos) >= 0:\n",
    "                    exist_type[eval_type(r_mthd_type)] = 1\n",
    "\n",
    "\n",
    "    idx = 0\n",
    "    while sum(exist_type) > 1:\n",
    "        exist_type[idx] = 0\n",
    "        idx += 1\n",
    "\n",
    "    if sum(exist_type) == 1:\n",
    "        return exist_type.argmax()\n",
    "    else: return None"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "appointed-delta",
   "metadata": {},
   "outputs": [],
   "source": [
    "apk_path = 'res/' # path of the output result of the jar\n",
    "apk_list = set([f for dp, dn, filenames in os.walk(apk_path) for f in filenames])\n",
    "\n",
    "result = np.zeros(5)\n",
    "for t in apk_list:\n",
    "    if t == 'appWithCB' or t == 'TimeOuts.txt' or t.startswith('.'): continue\n",
    "    _ = is_interest_apk(apk_path + t)\n",
    "    if _:\n",
    "        d = get_result(_)\n",
    "        if d is not None:\n",
    "            result[d] += 1\n",
    "\n",
    "result        "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "direct-there",
   "metadata": {},
   "outputs": [],
   "source": [
    "apk_path = '' # path of the output result of the jar\n",
    "\n",
    "_ = is_interest_apk(apk_path)\n",
    "if _:\n",
    "    print(get_result(_))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "therapeutic-cheese",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.6"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
